Title
%config Completer.use_jedi = False #for autocompletion feature
import pandas as pd
import pandas_datareader.data as web
import numpy as np
import statsmodels.api as sm
import scipy.stats as scs
import yfinance as yf
import sklearn.mixture as mix
from pytrends.request import TrendReq
import time
import datetime
import matplotlib as mpl
from matplotlib import cm
import matplotlib.pyplot as plt
from matplotlib.dates import YearLocator, MonthLocator
%matplotlib inline
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import plotly
import plotly.io as pio
pio.templates.default = "none"
from tqdm import tqdm
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
table=pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
df_SP500 = table[0]
df_SP500.head()
label_dictionary = {
'KO': 'Cola',
'PEP': 'Pepsi',
'LEG': 'Leggett',
'HD': 'Home Depot',
}
start = datetime.datetime(1980, 1, 1)
end = datetime.datetime.today()-datetime.timedelta()
factors = list(label_dictionary.keys())
factordict = dict()
for f in tqdm(factors):
try:
factordict[f] = pd.DataFrame(yf.download(f,start=start, end=end,progress=False))
except:
pass
import plotly.graph_objs as go
fig = go.Figure()
dates = [factordict[f].index[0] for f in factors]
start_date = '1980-01-01' #max(dates)
for f in factordict:
# filter by earliest date
factor = factordict[f][(factordict[f].index > start_date)]
fig.add_trace(go.Scattergl(
x=factor.index,
y=np.log(factor['Adj Close'].values/factor['Adj Close'].values[0]),
name=label_dictionary[f],
mode="lines"))
fig.update_layout(title={'text': 'Stock comparison'})
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
dd = dict((f,factordict[f]['Adj Close']) for f in factordict)
df = pd.DataFrame(dd)
df = df.dropna()
df.head()
dfH = dict((f,factordict[f]['High']) for f in factordict)
dfH = pd.DataFrame(dfH)
dfH = dfH.dropna()
dfL = dict((f,factordict[f]['Low']) for f in factordict)
dfL = pd.DataFrame(dfL)
dfL = dfL.dropna()
def calculateSpan(price):
S = list(np.zeros(len(price)))
n = len(price)
# Create a stack and push index of 1st element to it
st = []
st.append(0)
# Span value of 1st element is 1
S[0] = 1
for i in range(1, n):
# Pop elements from stack while stack is not
# empty and top of stack is smaller than price[i]
while( len(st) > 0 and price[st[-1]] >= price[i]):
st.pop()
# If stack becomes empty, then price[i] is greater
# than all elements on left of it, i.e. price[0],
# price[1], ..price[i-1]. Else the price[i] is
# greater than elements after top of stack
S[i] = i + 1 if len(st) <= 0 else (i - st[-1])
# Push this element to stack
st.append(i)
return S
def minmax_scale(df):
return (df - np.min(df)) / (np.max(df)-np.min(df))
def z_scale(df):
return (df - np.average(df)) / (np.std(df))
gap = 0.001
from plotly.subplots import make_subplots
import plotly.graph_objects as go
for f in df.columns[:5]:
var = f
df_test = df[var]
dH = dfH[var]
dL = dfL[var]
fig = make_subplots(rows=6, cols=1,
shared_xaxes=True,
vertical_spacing=0.02)
fig.add_trace(go.Scattergl(
x=df_test.index,
y=np.log(df_test/df_test[0]),
name=label_dictionary[var],
mode="lines"),row=1, col=1)
fig.add_trace(go.Scattergl(
x=df_test.index[df_test.pct_change()>=gap],
y=df_test.pct_change()[df_test.pct_change()>=gap],
name='+ve daily returns',
mode="lines"),row=2, col=1)
fig.add_trace(go.Scattergl(
x=df_test.index[df_test.pct_change()<-gap],
y=df_test.pct_change()[df_test.pct_change()<-gap],
name='-ve daily returns',
mode="lines"),row=2, col=1)
fig.add_trace(go.Scattergl(
x=df_test.index[df_test.pct_change()>=gap],
y=df_test.pct_change()[df_test.pct_change()>=gap].rolling(21).std()*21**0.5,
name='21-day +ve volatility',
mode="lines"),row=3, col=1)
fig.add_trace(go.Scattergl(
x=df_test.index[df_test.pct_change()<-gap],
y=df_test.pct_change()[df_test.pct_change()<-gap].rolling(21).std()*21**0.5,
name='21-day -ve volatility',
mode="lines"),row=3, col=1)
fig.add_trace(go.Scattergl(
x=dH.index[dH.pct_change()>=gap],
y=dH.pct_change()[dH.pct_change()>=gap].rolling(21).std()*21**0.5,
name='21-day +ve volatility H',
mode="lines"),row=3, col=1)
fig.add_trace(go.Scattergl(
x=dL.index[dL.pct_change()<-gap],
y=dL.pct_change()[dL.pct_change()<-gap].rolling(21).std()*21**0.5,
name='21-day -ve volatility L',
mode="lines"),row=3, col=1)
fig.add_trace(go.Scattergl(
x=df_test.index,
y=df_test.pct_change().rolling(21).std()*21**0.5,#
name='21-day volatility',
mode="lines"),row=3, col=1)
fig.add_trace(go.Scattergl(
x=df_test.index,
y=(df_test-df_test.cummax())/df_test.cummax(),
name='Drawdown',
mode="lines"),row=4, col=1)
'''fig.add_trace(go.Scattergl(
x=df_test.index,
y=(df_test-dH.cummax())/dL.cummax(),
name='Drawdown modified',
mode="lines"),row=4, col=1)'''
S = calculateSpan(df_test)
fig.add_trace(go.Scattergl(
x=df_test.index,
y=S,
name='Drawdown days',
mode="lines"),row=5, col=1)
fig.add_trace(go.Scattergl(
x=df_test.index,
y=minmax_scale(S)*z_scale(S),
name='Drawdown days scaled',
mode="lines"),row=6, col=1)
fig.update_layout(height=800, width=1000,
title_text=var)
fig.layout.yaxis1.tickformat = ',.0%'
fig.layout.yaxis2.tickformat = ',.0%'
fig.layout.yaxis3.tickformat = ',.0%'
fig.layout.yaxis4.tickformat = ',.0%'
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()